Loading in data
library(data.table)
setwd("~/HomeworkWeek6/")
mygenes <- read.table(file = 'gene_dist_head.tsv',header=TRUE, sep = "\t", quote = "\"'")
{ library(data.table) setwd(“~/HomeworkWeek6/”) mygenes <- read.table(file = ‘gene_dist_head.tsv’,header=TRUE, sep = “, quote =”"‘") library(’ggplot2’) library(‘plotly’) }
summary(mygenes)
## transcript_type feature chr
## protein_coding :2001054 exon :1306656 1 : 238010
## nonsense_mediated_decay: 293471 CDS : 791856 2 : 189916
## processed_transcript : 173401 UTR : 304070 17 : 166529
## retained_intron : 150034 transcript : 215170 19 : 163304
## lincRNA : 55928 stop_codon : 73411 3 : 159475
## antisense : 45811 start_codon: 73358 11 : 157597
## (Other) : 108613 (Other) : 63791 (Other):1753481
## start end
## Min. : 577 Min. : 647
## 1st Qu.: 31698816 1st Qu.: 31700419
## Median : 56565463 Median : 56566763
## Mean : 73148763 Mean : 73152067
## 3rd Qu.:108204790 3rd Qu.:108206944
## Max. :249230780 Max. :249231242
##
Create a vector to subset autosomes and graph new “genes” dataframe
library(ggplot2) # We load in libraries
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
# define a factor set of autosomes
autosomes<-c("1","2","3","4","5","6","7","8","9","10",
"11","12","13","14","15","16","17","18","19","20","21","22")
# create a new dataframe called 'genes' which only has autosomes, however we still have those additional factors
genes<- mygenes[ which(mygenes$chr %in% autosomes), ]
# remove the factors and then order them so they are numerical order and not alphabetical
genes$chr <- factor(genes$chr, levels = autosomes)
# plot in ggplot
ggplot(data = genes) + geom_bar(mapping = aes(x = chr, fill = feature), width = 1)
# go to polar coordinates
ggplot(data = genes) + geom_bar(mapping = aes(x = chr, fill = feature), width = 1) + coord_polar()
library(ggplot2)
library(plotly)
p<-ggplot(data = genes) + geom_bar(mapping = aes(x = chr, fill = feature), width = 1)
ggplotly(p)